
#######################################################################################################################
# Code for JEPS replication files
# October 8, 2015
# Cooperating With the State: Evidence from Survey Experiments on Policing
#
#######################################################################################################################

rm(list=ls())

library(foreign)
library(xtable)
library(memisc)
library(reporttools)
library(stargazer)

dta <- read.dta("CWTS prepared data.dta")

setwd("output/")

# Rename outcome and treatment variables to something that makes more sense and matches the order that we present these experiments in the paper (they were three of eight experiments in the survey instrument, differently ordered)
dta$y_se1 <- dta$full1
dta$y_se2 <- dta$full5
dta$y_se3 <- dta$full2
dta$treat_se1 <- dta$q1var
dta$treat_se2 <- dta$q5var
dta$treat_se3 <- dta$q2var

attach(dta)

# Generate binary indicator for item non-response
q1.nr <- recode(y_se1, 1 <- NA, 0 <- c(1,2,3,4,5))
q2.nr <- recode(y_se2, 1 <- NA, 0 <- c(1,2,3,4,5))
q3.nr <- recode(y_se3, 1 <- NA, 0 <- c(1,2,3,4,5))


#######################################################################################################################
# FIGURES 1-3 #########################################################################################################
#######################################################################################################################

# Parameters: outcome variable; 4-way treatment indicator; experiment number; text for legend; legend text size; legend location
superplot <- function(data, grouper, num, legendvec, cex1, horiz, vert){
	xx <- yy <- matrix(NA,4,5)
	prp.na <- num.na <- rep(NA,4)

	# Set up histogram data for each treatment condition
	for (qq in 1:4) {
		h <- hist(na.omit(data[grouper==qq]), plot=FALSE)
		diffBreaks <- h$mids[2] - h$mids[1]
		tmp1 <- c(h$mids[1]-diffBreaks, h$mids, tail(h$mids,1)+diffBreaks )
		tmp2 <- c(0, h$density/2, 0)
		xx[qq,] <- round(tmp1[tmp2>0])
		yy[qq,] <- tmp2[tmp2>0]
		num.na[qq] <- length(which(is.na(data[grouper==qq])==T))
		prp.na[qq] <- num.na[qq]/(num.na[qq]+sum(h$count))
		}
		
	xx.m <- cbind(c(0,0,0,0),xx)
	means <- c(mean(na.omit(data[grouper ==1])),mean(na.omit(data[grouper ==2])),mean(na.omit(data[grouper ==3])),mean(na.omit(data[grouper ==4])))
	factor <- max(yy)/5
	means <- means*factor
	yy.m <- cbind(c(NA,NA,NA,NA),yy)
	
	# Open pdf and plot first treatment group
	pdf(file=paste("q",num,"super1.pdf",sep=""))
	par(mar=c(5,4,4,1), family="")  # CMU Serif
	plot(xx.m[1,], yy.m[1,], lwd=2, col="blue",type="o",pch=19,ylim=c(0,0.6),ylab="Proportion of non-missing responses",xlab="Response (\"Completely unlikely\" to \"For certain\")",xaxt="n",xlim=c(-0.1,5.1),bty="n",main=paste("Question ",num," Responses, by Experimental Group",sep=""),lty=1,cex=1.2)
	mtext("How likely are you to report this crime to the police?")
	axis(1,at=c(seq(0,5.2,1)),labels=c("NR",1,2,3,4,5))

	# Plot remaining three treatment groups, non-response points, legend; close pdf
	lines(xx.m[2,], yy.m[2,],lwd=2,col="red",type="o",pch=1,lty=2,cex=1.2)
	lines(xx.m[3,], yy.m[3,],lwd=2,col="green",type="o",pch=17,lty=3,cex=1.2)
	lines(xx.m[4,], yy.m[4,],lwd=2,col="darkgray",type="o",pch=2,lty=4,cex=1.2)
	points(rep(0,4),prp.na,pch=c(19,1,17,2),cex=1.2,col=c("blue","red","green","darkgray"), lwd=2)
	legend(horiz,vert,col=c("blue","red","green","darkgray"),lwd=2,legend=legendvec,cex=cex1,pch=c(19,1,17,2),lty=c(1,2,3,4))
	dev.off()
}

# Run function three times, will automatically output pdf files
superplot(y_se1, treat_se1, 1, c("G1: Police officer stealing","G2: Police officer beating","G3: Stranger stealing","G4: Stranger beating"), 0.9, -0.0, 0.58)  # 3, 0.12
superplot(y_se3, treat_se3, 3, c("G1: Low-value robbery","G2: Low-value robbery + \"busy\" frame","G3: High-value robbery","G4: High-value robbery + \"busy\" frame"), 0.9, 0, 0.58)
superplot(y_se2, treat_se2, 2, c("G1: Baseline","G2: Civic-duty frame","G3: Reward","G4: Civic-duty frame + reward"), 0.9, 0, 0.58)


#######################################################################################################################
# TABLES 1-3 ##########################################################################################################
#######################################################################################################################

# Parameters: outcome variable; 4-way treatment indicator; null
tmat <- function(qdata,qvar,qnum){
	tmat.1 <- matrix(NA,6,3)

	# Calculate means and SDs for each of 4 treatment indicators, store in matrix
	tmat.1[1,1] <- mean(qdata[qvar==1],na.rm=T)
	tmat.1[2,1] <- sd(qdata[qvar==1],na.rm=T)
	tmat.1[3,1] <- mean(qdata[qvar==2],na.rm=T)
	tmat.1[4,1] <- sd(qdata[qvar==2],na.rm=T)
	tmat.1[1,2] <- mean(qdata[qvar==3],na.rm=T)
	tmat.1[2,2] <- sd(qdata[qvar==3],na.rm=T)
	tmat.1[3,2] <- mean(qdata[qvar==4],na.rm=T)
	tmat.1[4,2] <- sd(qdata[qvar==4],na.rm=T)
	
	# Calculate t-test differences in means, SEs
	d.13 <- t.test(qdata[qvar==1 | qvar==3] ~ qvar[qvar==1 | qvar==3])
	d.12 <- t.test(qdata[qvar==1 | qvar==2] ~ qvar[qvar==1 | qvar==2])
	d.24 <- t.test(qdata[qvar==2 | qvar==4] ~ qvar[qvar==2 | qvar==4])
	d.34 <- t.test(qdata[qvar==4 | qvar==3] ~ qvar[qvar==4 | qvar==3])
	d.14 <- t.test(qdata[qvar==4 | qvar==1] ~ qvar[qvar==4 | qvar==1])
	
	# Store t-test output into matrix
	tmat.1[1,3] <- d.13$est[2]-d.13$est[1]
	tmat.1[2,3] <- -tmat.1[1,3]/d.13$stat
	tmat.1[5,1] <- d.12$est[2]-d.12$est[1]
	tmat.1[6,1] <- -tmat.1[5,1]/d.12$stat
	tmat.1[3,3] <- d.24$est[2]-d.24$est[1]
	tmat.1[4,3] <- -tmat.1[3,3]/d.24$stat
	tmat.1[5,2] <- d.34$est[2]-d.34$est[1]
	tmat.1[6,2] <- -tmat.1[5,2]/d.34$stat
	tmat.1[5,3] <- d.14$est[2]-d.14$est[1]
	tmat.1[6,3] <- -tmat.1[5,3]/d.14$stat
	
	# Output matrix in nice format, xtable to tex
	tmat.1 <- matrix(tmat.1,6,3,dimnames=list(rows=c("Treatment A1","SD","Treatment A2","SD","Diff in means","SE"),cols=c("Treatment B1","Treatment B2","Diff in means")))
	tmat.out <- xtable(tmat.1,digits=2,align=c("l","c","c","c"))
	
	return(tmat.out)
}

# Run function three times, appending output into one file
print(tmat(y_se1,treat_se1),type="latex",file="ttestout1.tex")
print(tmat(y_se3,treat_se3),type="latex",file="ttestout1.tex",append=T)
print(tmat(y_se2,treat_se2),type="latex",file="ttestout1.tex",append=T)



#######################################################################################################################
# FIGURE A1: Balance tests ############################################################################################
#######################################################################################################################

# Specify covariates we are looking for balance over; give nice labels
balvars <- cbind(age,male,material,edu,contact_any,crimein12mo,polstaknow,newmoscow,russian)
qnames<- list(c("Age","Male","Material security","Education","Interaction with police","Occurrence of crime in past 12 months","Know location of nearest police station","New to Moscow","Russian nationality"),c("Question 1","Question 2","Question 3"))

# Empty matrix to store all t-test p-values in
balmat.tfull.flat <- matrix(NA, dim(balvars)[2],9, dimnames=list(qnames[[1]], NULL))
for (ix in 1:dim(balvars)[2]){
	oneq <- balvars[,ix]
	balmat.tfull.flat[ix,] <- c(t.test(oneq ~ beating)$p.val, t.test(oneq ~ stranger)$p.val, t.test(oneq ~ strangerXbeating)$p.val, t.test(oneq ~ duty)$p.val, t.test(oneq ~ reward)$p.val, t.test(oneq ~ dutyXreward)$p.val, t.test(oneq ~ busy)$p.val, t.test(oneq ~ highval)$p.val, t.test(oneq ~ busyXhighval)$p.val)
}

# Open pdf file, plot all p values using a variety of characters
cx <- 1.5
pdf(file="balanceplot.pdf", width=14)
par(mar=c(4,1,1,1))
plot(balmat.tfull.flat[,9], seq(1,dim(balmat.tfull.flat)[1]), pch=16, xlim=c(-0.5,1), frame.plot=F, axes=F, ylab="", xlab=expression(paste(italic("p"),"-value from ",italic("t"),"-test",sep="")), cex=cx)
points(balmat.tfull.flat[,8], seq(1,dim(balmat.tfull.flat)[1]), pch=17, cex=cx)
points(balmat.tfull.flat[,7], seq(1,dim(balmat.tfull.flat)[1]), pch=3, cex=cx)
points(balmat.tfull.flat[,6], seq(1,dim(balmat.tfull.flat)[1]), pch=8, cex=cx)
points(balmat.tfull.flat[,5], seq(1,dim(balmat.tfull.flat)[1]), pch=10, cex=cx)
points(balmat.tfull.flat[,4], seq(1,dim(balmat.tfull.flat)[1]), pch=21, cex=cx)
points(balmat.tfull.flat[,3], seq(1,dim(balmat.tfull.flat)[1]), pch=22, cex=cx)
points(balmat.tfull.flat[,2], seq(1,dim(balmat.tfull.flat)[1]), pch=23, cex=cx)
points(balmat.tfull.flat[,1], seq(1,dim(balmat.tfull.flat)[1]), pch=24, cex=cx)
abline(v=0.05, lty=2)
abline(v=0.1, lty=3)
text(-0.5, seq(1,dim(balmat.tfull.flat)[1]), labels=rev(qnames[[1]]), cex=0.9, adj=0)
axis(1, at=seq(0,1,0.1))
legend(-0.29, 9, legend=c("Q1 Beating", "Q1 Stranger", "Q1 Beating x Stranger", "Q2 Duty", "Q2 Reward", "Q2 Duty x Reward", "Q3 Busy", "Q3 High value", "Q3 Busy x High value"), pch=c(16, 17, 3, 8, 10, 21, 22, 23, 24), cex=0.9)
dev.off()


#######################################################################################################################
# TABLE A1: Non-response basics and correlations ######################################################################
#######################################################################################################################

nrtable <- cor(cbind(q1.nr, q2.nr, q3.nr))
nrtable <- cbind(c(mean(q1.nr), mean(q2.nr), mean(q3.nr)), nrtable)

print(xtable(nrtable,digits=3,align=c("l","c","c","c","c")),type="latex",file="nonresponsebasics1.tex")


#######################################################################################################################
# TABLE A2: Item response rates by demographics; table of non-response ################################################
#######################################################################################################################

# Run logit models with and without covariates
nrols.q1 <- glm(q1.nr ~ stranger + beating, family=binomial)
nrols.q1.2 <- glm(q1.nr ~ stranger + beating + age + male + material + edu + contact_any + crimein12mo + polstaknow + newmoscow + russian, family=binomial)

nrols.q2 <- glm(q2.nr ~ duty + reward, family=binomial)
nrols.q2.2 <- glm(q2.nr ~ duty + reward + age + male + material + edu + contact_any + crimein12mo + polstaknow + newmoscow + russian, family=binomial)

nrols.q3 <- glm(q3.nr ~ busy + highval, family=binomial)
nrols.q3.2 <- glm(q3.nr ~ busy + highval + age + male + material + edu + contact_any + crimein12mo + polstaknow + newmoscow + russian, family=binomial)

# Output nice latex file with these six models
stargazer(nrols.q1, nrols.q1.2, nrols.q2, nrols.q2.2, nrols.q3, nrols.q3.2, type="latex", style="apsr", covariate.labels=c("Stranger", "Beating","Age/100","Male","Material security","Education","Interaction with police","Occurrence of crime in past 12 months","Know location of nearest police station","New to Moscow","Russian nationality", "Civic duty", "Reward", "Busy", "High value"), font.size="scriptsize", label="nonresponsetable1", dep.var.labels=c("Question 1","Question 2","Question 3"), out="nonresponsetable1.tex", omit.stat=c("aic", "ser" ,"f"), table.placement="h!", title="Item Non-Response by Question", column.labels="Item Non-Reponse")



#######################################################################################################################
# TABLE A3: Item response rates by stratification #####################################################################
#######################################################################################################################

# Calculate mean non-response for each question in each okrug, raion, okrugXraion (for nice output later)
q1o <- stats:::aggregate.formula(q1.nr ~ okrug, data=dta, mean)
q1p <- stats:::aggregate.formula(q1.nr ~ raion, data=dta, mean)
q1op <- stats:::aggregate.formula(q1.nr ~ okrug + raion, data=dta, mean)

q2o <- stats:::aggregate.formula(q2.nr ~ okrug, data=dta, mean)
q2p <- stats:::aggregate.formula(q2.nr ~ raion, data=dta, mean)
q2op <- stats:::aggregate.formula(q2.nr ~ okrug + raion, data=dta, mean)

q3o <- stats:::aggregate.formula(q3.nr ~ okrug, data=dta, mean)
q3p <- stats:::aggregate.formula(q3.nr ~ raion, data=dta, mean)
q3op <- stats:::aggregate.formula(q3.nr ~ okrug + raion, data=dta, mean)

# Get sample size in each okrug and raion
okrugsum <- stats:::aggregate.formula(rep(1, length(okrug)) ~ okrug, data=dta, sum)
raionsum <- stats:::aggregate.formula(rep(1, length(okrug)) ~ raion, data=dta, sum)

# Merge all this stuff above together in the right order so we end up with one nice table
raionfull <- merge(raionsum, q1op, by="raion")
raionfull <- merge(raionfull, q2p, by="raion")
raionfull <- merge(raionfull, q3p, by="raion")
okrugfull <- merge(okrugsum, q1o, by="okrug")
okrugfull <- merge(okrugfull, q2o, by="okrug")
okrugfull <- merge(okrugfull, q3o, by="okrug")
fulltable <- merge(okrugfull, raionfull, by="okrug")

# Output this table, rename columns, print as latex using xtable
fulltable[,1] <- as.numeric(fulltable[,1])
colnames(fulltable) <- c("Okrug", "Sampled N", "SE1 NR", "SE2 NR", "SE3 NR", "Region ID", "Sampled N", "SE1 NR", "SE2 NR", "SE3 NR")
print(xtable(fulltable, digits=c(0,0,0,3,3,3,0,0,3,3,3),align=c("cllcccllccc")),type="latex", include.rownames=F, table.placement="htb!", size="scriptsize")


#######################################################################################################################
# TABLE A7: Spillovers ################################################################################################
#######################################################################################################################

# Run six OLS models with possible combinations of outcome variables and treatment indicators from earlier experiments
# Don't need to look for spillovers with SE1 since it was first
spill.q2 <- lm(y_se2 ~ stranger + beating + busy + highval)
spill.q2.2 <- lm(y_se2 ~ stranger + beating + busy + highval + q3var + q4var)
q2reg.spill <- lm(y_se2 ~ duty*reward + stranger + beating + busy + highval)
q2reg.spill.2 <- lm(y_se2 ~ duty*reward + stranger + beating + busy + highval + q3var + q4var)
spill.q3 <- lm(y_se3 ~ stranger + beating)
q3reg.spill <- lm(y_se3 ~ busy*highval + stranger + beating)

# Output all six in nice latex table
stargazer(spill.q2, spill.q2.2, q2reg.spill, q2reg.spill.2, spill.q3, q3reg.spill, type="latex", style="apsr", covariate.labels=c("Civic Duty (SE2)", "Reward (SE2)", "Stranger (SE1)", "Beating (SE1)", "Busy X High Value (SE1)", "Busy (SE3)", "High Value (SE3)", "Intervening Treatment 1", "Intervening Treatment 2", "Civic Duty X Reward (SE2)"), font.size="footnotesize", label="spillovers", dep.var.labels=c(rep("SE2 Outcome",4), "SE3 Outcome", "SE3 Outcome"), out="spillovers.tex", omit.stat=c("aic", "ser" ,"f"), table.placement="h!", title="SE2 and SE3 Spillovers", notes.align="c", notes=c("Dependent variables are outcomes for Survey Experiments (SE) 2 and 3, as noted at the top of the table.", "Predictor variables are marked according to which SE they are treatments for."))



#######################################################################################################################
# TABLE A8: Descriptive statistics of covariates ######################################################################
#######################################################################################################################

balvars <- data.frame(age, male, material, edu, contact_any, crimein12mo, polstaknow, newmoscow, russian)

tableContinuous(vars=balvars, stats=c("n", "min", "max", "median", "mean", "s", "na"), prec=2, col.tit=NA, cap="Covariate Descriptive Statistics", lab="descriptive_covariates", font.size="footnotesize")



